Implementing Azure Vision OCR

by Steve JeffersonSept 8th, 2016

Azure Computer Vision includes Optical Character Recognition (OCR) capabilities. You can use the Read API to extract printed and handwritten text from images and documents. It uses deep learning based models and works with text on a variety of surfaces and backgrounds. These include business documents, invoices, receipts, posters, business cards, letters, and whiteboards. The OCR APIs support extracting printed text in several languages. Follow a quickstart to get started.

Azure configuration

  1. Register for an account in Microsoft Azure Cloud Platform, The Azure free account includes access to several Azure products that are free for 12 months.
  2. Create a Cognitive Services resource.
  3. Make note of the Api Key and the endpoints which are located under "Keys and Endpoint"

Demo

Code Breakdown

Importing required libraries

 
import os
import json
import time
from requests import get,post
            

Setting Global Variables

 
dir=os.getcwd() 
API_KEY = '<< YOURAPIKEY >>' 
            

Image Handler


def handler(pathToImage):
    ENDPOINT = 'https://iit-ocr.cognitiveservices.azure.com/vision/v3.1/read/analyze'
    #DIR = os.getcwd()

    #pathToImage = '{0}/{1}'.format(DIR, pathToImage) 
    print('Processing: ' + pathToImage)
    headers  = {
        'Ocp-Apim-Subscription-Key': API_KEY,
        'Content-Type': 'application/octet-stream'
    }
    body    = {
        'language': 'en',
    }
    payload = open(pathToImage, 'rb').read()
    response = post(ENDPOINT, headers=headers, json = body, data=payload)
    get_url=response.headers['Operation-Location']

    return get_endpoint(get_url)
          

Geting Asyncronous Endpoint


def get_endpoint(end_url):
    n_tries = 15
    n_try = 0
    wait_sec = 5
    max_wait_sec = 60
    while n_try < n_tries:
        try:
            print(n_try,wait_sec)
            resp = get(url = end_url, headers = {"Ocp-Apim-Subscription-Key": API_KEY})
            resp_json = resp.json()
            if resp.status_code != 200:
                print("GET analyze results failed:\n%s" % json.dumps(resp_json))
    
            status = resp_json["status"]
            if status == "succeeded":
                print("Analysis succeeded:\n")
                return str(resp_json)
    
                n_try = n_tries
            if status == "failed":
                return("Analysis failed:\n%s" % json.dumps(resp_json))
    
            # Analysis still running. Wait and retry.
            time.sleep(wait_sec)
            n_try += 1
            wait_sec = min(2*wait_sec, max_wait_sec)     
        except Exception as e:
            msg = "GET analyze results failed:\n%s" % str(e)
            return(msg)

            

Extracting Text From Response



def parse_text(results):
    text = ''
    dic=results['analyzeResult']['readResults']
    
    for part in dic:
        for line in part['lines']:
            for word in line['words']:
                text += word['text'] + ' '
            text += '\n'
    return (text)

            

Visualisation Code


  
def plot_rectangle(orig,x1,y1,confidence):
    if confidence>0.9:
        col=(0, 255, 0)
    elif confidence>0.7:
        col=(0, 255, 0)
    else:
        col=(0, 0, 255)
    mod = cv2.rectangle(orig, x1, y1, col, 5)
    return mod

def plot(json_file):
    alpha = 0.4
    img_file = 'sign.jpg'
    orig = cv2.imread(img_file)
    copy=orig.copy()
    flag=0

  temp = json_file['analyzeResult']['readResults'][0]['lines']


    for line in temp:
        for box in line['words']:
            arr=box['boundingBox']
            confidence=box['confidence']
            orig=plot_rectangle(orig,(arr[2],arr[1]),(arr[6],arr[5]),confidence)
            print((arr[2],arr[1]),(arr[6],arr[5]),confidence)



    mode = cv2.addWeighted(orig, alpha, copy, 1 - alpha,0)
    cv2.imshow('image', cv2.resize(mode, (1360, 768)))
    cv2.waitKey(0)
  

Write to file


              
def write_file(get_data,filename):
    filename=os.path.join('docs',filename)
    f = open(filename, "w")
    f.write(get_data)
    f.close()
              

Integrating all the components


handler(os.path.join(dir,path))
plot(get_data)
write_file(get_data,os.path.join(dir, 'jsondic.py'))